{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "execution": {
     "iopub.execute_input": "2023-11-07T17:49:57.862553Z",
     "iopub.status.busy": "2023-11-07T17:49:57.862129Z",
     "iopub.status.idle": "2023-11-07T17:49:58.203809Z",
     "shell.execute_reply": "2023-11-07T17:49:58.202877Z",
     "shell.execute_reply.started": "2023-11-07T17:49:57.862518Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:49:58.852921Z",
     "iopub.status.busy": "2023-11-07T17:49:58.851950Z",
     "iopub.status.idle": "2023-11-07T17:50:02.333118Z",
     "shell.execute_reply": "2023-11-07T17:50:02.332097Z",
     "shell.execute_reply.started": "2023-11-07T17:49:58.852884Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "from torch.utils.data import Dataset, DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:02.335369Z",
     "iopub.status.busy": "2023-11-07T17:50:02.334873Z",
     "iopub.status.idle": "2023-11-07T17:50:04.208009Z",
     "shell.execute_reply": "2023-11-07T17:50:04.207235Z",
     "shell.execute_reply.started": "2023-11-07T17:50:02.335341Z"
    }
   },
   "outputs": [],
   "source": [
    "import transformers\n",
    "from transformers import GPT2Tokenizer, BertTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:04.209571Z",
     "iopub.status.busy": "2023-11-07T17:50:04.209117Z",
     "iopub.status.idle": "2023-11-07T17:50:05.191586Z",
     "shell.execute_reply": "2023-11-07T17:50:05.190718Z",
     "shell.execute_reply.started": "2023-11-07T17:50:04.209543Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import f1_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:05.193851Z",
     "iopub.status.busy": "2023-11-07T17:50:05.193561Z",
     "iopub.status.idle": "2023-11-07T17:50:05.198757Z",
     "shell.execute_reply": "2023-11-07T17:50:05.197744Z",
     "shell.execute_reply.started": "2023-11-07T17:50:05.193825Z"
    }
   },
   "outputs": [],
   "source": [
    "def free_memory():\n",
    "    gc.collect()\n",
    "    torch.cuda.empty_cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:05.200332Z",
     "iopub.status.busy": "2023-11-07T17:50:05.199968Z",
     "iopub.status.idle": "2023-11-07T17:50:05.210819Z",
     "shell.execute_reply": "2023-11-07T17:50:05.209775Z",
     "shell.execute_reply.started": "2023-11-07T17:50:05.200305Z"
    }
   },
   "outputs": [],
   "source": [
    "sentiment_map = {\n",
    "    'negative': 0,\n",
    "    'neutral': 1,\n",
    "    'positive': 2\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:05.212023Z",
     "iopub.status.busy": "2023-11-07T17:50:05.211793Z",
     "iopub.status.idle": "2023-11-07T17:50:05.220920Z",
     "shell.execute_reply": "2023-11-07T17:50:05.220112Z",
     "shell.execute_reply.started": "2023-11-07T17:50:05.212002Z"
    }
   },
   "outputs": [],
   "source": [
    "sber_tokenizer_path = 'sberbank-ai/rugpt3small_based_on_gpt2'\n",
    "rubert_tokenizer_path = 'DeepPavlov/rubert-base-cased'\n",
    "lt_model_path = 'models/lt_sberbank-gpt-sentiment-classifier.pth'\n",
    "st_model_path = 'models/st_sberbank-gpt-sentiment-classifier.pth'\n",
    "st_medium_model_path = 'models/st_sberbank-gpt-sentiment-classifier_medium.pth'\n",
    "st_rubert_path = 'models/st_rubert-sentiment-classifier.pth'\n",
    "lt_rubert_path = 'models/lt_rubert-sentiment-classifier.pth'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:05.222044Z",
     "iopub.status.busy": "2023-11-07T17:50:05.221819Z",
     "iopub.status.idle": "2023-11-07T17:50:05.261849Z",
     "shell.execute_reply": "2023-11-07T17:50:05.260990Z",
     "shell.execute_reply.started": "2023-11-07T17:50:05.222024Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'cuda'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device = ('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:50:05.263391Z",
     "iopub.status.busy": "2023-11-07T17:50:05.263128Z",
     "iopub.status.idle": "2023-11-07T17:51:02.724807Z",
     "shell.execute_reply": "2023-11-07T17:51:02.723726Z",
     "shell.execute_reply.started": "2023-11-07T17:50:05.263370Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0b5bad189fd64424827955d3e382eecb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.71M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4b7da534b47a44628cc63fda5643bfc0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)olve/main/merges.txt:   0%|          | 0.00/1.27M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ebb595a82f134e29a41d1a18d7d73d76",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f4281289c0047b387f44fc7a741c56b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e6fc8164290b4c1d87d04b52de24878e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e12f22a69e1d446f91022c29d0ba0f47",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)okenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ff29156526834a80978c0e70b74e9dac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tokenizer = GPT2Tokenizer.from_pretrained(sber_tokenizer_path)\n",
    "rubert_tokenizer = BertTokenizer.from_pretrained(rubert_tokenizer_path)\n",
    "lt_model = torch.load(lt_model_path, map_location=device).to(device)\n",
    "st_model = torch.load(st_model_path, map_location=device).to(device)\n",
    "st_medium_model = torch.load(st_medium_model_path, map_location=device).to(device)\n",
    "st_rubert = torch.load(st_rubert_path, map_location=device).to(device)\n",
    "lt_rubert = torch.load(lt_rubert_path, map_location=device).to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:02.728095Z",
     "iopub.status.busy": "2023-11-07T17:51:02.727768Z",
     "iopub.status.idle": "2023-11-07T17:51:03.442549Z",
     "shell.execute_reply": "2023-11-07T17:51:03.441611Z",
     "shell.execute_reply.started": "2023-11-07T17:51:02.728051Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>review</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>качество плохое пошив ужасный (горловина напер...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Товар отдали другому человеку, я не получила п...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Ужасная синтетика! Тонкая, ничего общего с пре...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>товар не пришел, продавец продлил защиту без м...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Кофточка голая синтетика, носить не возможно.</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Очень глубокие проймы</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Я недовольна заказом.Я вот одного не понимаю п...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>заказала размер s на от 64,об 94,начнем с того...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Заказ я сделала в июле. С тех пор посылка отсл...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Ужасное качество товара!</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              review sentiment\n",
       "0  качество плохое пошив ужасный (горловина напер...  negative\n",
       "1  Товар отдали другому человеку, я не получила п...  negative\n",
       "2  Ужасная синтетика! Тонкая, ничего общего с пре...  negative\n",
       "3  товар не пришел, продавец продлил защиту без м...  negative\n",
       "4      Кофточка голая синтетика, носить не возможно.  negative\n",
       "5                              Очень глубокие проймы  negative\n",
       "6  Я недовольна заказом.Я вот одного не понимаю п...  negative\n",
       "7  заказала размер s на от 64,об 94,начнем с того...  negative\n",
       "8  Заказ я сделала в июле. С тех пор посылка отсл...  negative\n",
       "9                           Ужасное качество товара!  negative"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviews = pd.read_csv('data/reviews.csv.zip', sep='\\t')\n",
    "reviews.drop_duplicates(subset=['review'], inplace=True)\n",
    "reviews.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.447291Z",
     "iopub.status.busy": "2023-11-07T17:51:03.446988Z",
     "iopub.status.idle": "2023-11-07T17:51:03.461431Z",
     "shell.execute_reply": "2023-11-07T17:51:03.460390Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.447265Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>review</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>45140</th>\n",
       "      <td>Ехала долго посылка, больше полутора месяца. Д...</td>\n",
       "      <td>neautral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82647</th>\n",
       "      <td>Очень красивый и приятный на ощупь))</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39454</th>\n",
       "      <td>нитки торчат, прошито очень неаккуратно</td>\n",
       "      <td>neautral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71219</th>\n",
       "      <td>Доставка быстрая. По размеру подошло 46 р-р. Т...</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45132</th>\n",
       "      <td>Пахнет</td>\n",
       "      <td>neautral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72801</th>\n",
       "      <td>Отлично всё - пошив, качество, фасон. Брала сп...</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67107</th>\n",
       "      <td>Пришли за три недели, на 39 идеально!!! Понрав...</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1487</th>\n",
       "      <td>Плохое качество и велик размер S. Хотя у меня ...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9131</th>\n",
       "      <td>Прошло 4 месяца с продом.продлевали защиту тов...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3752</th>\n",
       "      <td>Товар так и не пришел! Пришел подарок от продо...</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  review sentiment\n",
       "45140  Ехала долго посылка, больше полутора месяца. Д...  neautral\n",
       "82647              Очень красивый и приятный на ощупь))   positive\n",
       "39454            нитки торчат, прошито очень неаккуратно  neautral\n",
       "71219  Доставка быстрая. По размеру подошло 46 р-р. Т...  positive\n",
       "45132                                            Пахнет   neautral\n",
       "72801  Отлично всё - пошив, качество, фасон. Брала сп...  positive\n",
       "67107  Пришли за три недели, на 39 идеально!!! Понрав...  positive\n",
       "1487   Плохое качество и велик размер S. Хотя у меня ...  negative\n",
       "9131   Прошло 4 месяца с продом.продлевали защиту тов...  negative\n",
       "3752   Товар так и не пришел! Пришел подарок от продо...  negative"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviews_test_sample = reviews.sample(frac=0.2, random_state=42)\n",
    "reviews_test_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.464614Z",
     "iopub.status.busy": "2023-11-07T17:51:03.464342Z",
     "iopub.status.idle": "2023-11-07T17:51:03.654133Z",
     "shell.execute_reply": "2023-11-07T17:51:03.652992Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.464590Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjgAAAGdCAYAAAAfTAk2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAw7klEQVR4nO3deXBUZb7/8U8CSScBOpEtIRIwNSgmCrIJNLgghjSQoVDRKxIFleXCDSKkEIZ7EcM2UUY2EUVHJOCFERxXFiFtFBAJWyCAoIiKE++FJFe2sHaapH9/WDk/mk0aOgYe368qqjjnPOd7nnOqn+5PztId5PV6vQIAADBIcFV3AAAAINAIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA41Sv6g5UlvLycu3fv1+1atVSUFBQVXcHAABcBq/Xq2PHjik2NlbBwVd+HsbYgLN//37FxcVVdTcAAMAV+Pnnn9WwYcMrXt/YgFOrVi1Jvx4gu90esLoej0fZ2dlKTk5WSEhIwOoCuHyMQ6BqVeYYLCkpUVxcnPU5fqWMDTgVl6XsdnvAA05ERITsdjtvrEAVYRwCVev3GINXe3sJNxkDAADjEHAAAIBxCDgAAMA4fgec//3f/9Xjjz+uOnXqKDw8XM2aNdOWLVus5V6vV+PGjVODBg0UHh6upKQk7d2716fGoUOHlJqaKrvdrqioKPXv31/Hjx/3abNjxw7dfffdCgsLU1xcnKZMmXKFuwgAAP5o/Ao4hw8fVseOHRUSEqJPP/1Uu3fv1tSpU3XDDTdYbaZMmaJXXnlFc+bM0caNG1WjRg05nU6dPn3aapOamqpdu3bJ5XJp2bJlWrt2rQYNGmQtLykpUXJysho3bqy8vDz97W9/U0ZGht58880A7DIAADCdX09RvfTSS4qLi9O8efOsefHx8db/vV6vZsyYobFjx6pnz56SpAULFig6OlofffSRevfurW+++UYrV67U5s2b1aZNG0nSrFmz1L17d7388suKjY3VwoULVVpaqrfffluhoaG67bbblJ+fr2nTpvkEIQAAgAvxK+B88skncjqdeuSRR7RmzRrdeOON+o//+A8NHDhQkrRv3z4VFhYqKSnJWicyMlLt2rVTbm6uevfurdzcXEVFRVnhRpKSkpIUHBysjRs36sEHH1Rubq7uuecehYaGWm2cTqdeeuklHT582OeMUQW32y23221Nl5SUSPr1UTaPx+PPbl5SRa1A1gTgH8YhULUqcwwGqqZfAefHH3/U66+/rvT0dP3nf/6nNm/erGHDhik0NFT9+vVTYWGhJCk6OtpnvejoaGtZYWGh6tev79uJ6tVVu3ZtnzZnnxk6u2ZhYeEFA05mZqbGjx9/3vzs7GxFRET4s5uXxeVyBbwmAP8wDoGqVRlj8OTJkwGp41fAKS8vV5s2bfTXv/5VktSyZUt9/fXXmjNnjvr16xeQDl2pMWPGKD093Zqu+CbE5OTkgH/Rn8vlUpcuXfiCMaCKMA6BqlWZY7DiCszV8ivgNGjQQImJiT7zEhIS9P7770uSYmJiJElFRUVq0KCB1aaoqEgtWrSw2hQXF/vUOHPmjA4dOmStHxMTo6KiIp82FdMVbc5ls9lks9nOmx8SElIpb4CVVRfA5WMcAlWrMsZgoOr59RRVx44dtWfPHp953333nRo3bizp1xuOY2JilJOTYy0vKSnRxo0b5XA4JEkOh0NHjhxRXl6e1ebzzz9XeXm52rVrZ7VZu3atz3U4l8ulpk2bXvDyFAAAwNn8CjgjRozQhg0b9Ne//lXff/+9Fi1apDfffFNpaWmSfv3diOHDh2vSpEn65JNPtHPnTvXt21exsbF64IEHJP16xqdr164aOHCgNm3apK+++kpDhw5V7969FRsbK0nq06ePQkND1b9/f+3atUuLFy/WzJkzfS5BAQAAXIxfl6juvPNOffjhhxozZowmTJig+Ph4zZgxQ6mpqVabUaNG6cSJExo0aJCOHDmiu+66SytXrlRYWJjVZuHChRo6dKjuv/9+BQcHq1evXnrllVes5ZGRkcrOzlZaWppat26tunXraty4cTwiDgAALkuQ1+v1VnUnKkNJSYkiIyN19OjRgN9kvGLFCnXv3p1r/0AVYRwCVasyx2CgPr/9OoMDANeS2zNWyV0WVNXduGw/vZhS1V0A/jD4sU0AAGAcAg4AADAOl6iuEKfGAQC4dnEGBwAAGIeAAwAAjEPAAQAAxiHgAAAA43CTMQAAVeymvyyv6i74xVbNqyltq7oXl8YZHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4fgWcjIwMBQUF+fy79dZbreWnT59WWlqa6tSpo5o1a6pXr14qKiryqVFQUKCUlBRFRESofv36eu6553TmzBmfNqtXr1arVq1ks9nUpEkTZWVlXfkeAgCAPxy/z+DcdtttOnDggPVv3bp11rIRI0Zo6dKleu+997RmzRrt379fDz30kLW8rKxMKSkpKi0t1fr16zV//nxlZWVp3LhxVpt9+/YpJSVF9913n/Lz8zV8+HANGDBAq1atuspdBQAAfxTV/V6henXFxMScN//o0aOaO3euFi1apM6dO0uS5s2bp4SEBG3YsEHt27dXdna2du/erc8++0zR0dFq0aKFJk6cqNGjRysjI0OhoaGaM2eO4uPjNXXqVElSQkKC1q1bp+nTp8vpdF7l7gIAgD8CvwPO3r17FRsbq7CwMDkcDmVmZqpRo0bKy8uTx+NRUlKS1fbWW29Vo0aNlJubq/bt2ys3N1fNmjVTdHS01cbpdGrIkCHatWuXWrZsqdzcXJ8aFW2GDx9+yX653W653W5ruqSkRJLk8Xjk8Xj83c2LqqhlC/YGrObvIZDHAKhqjEOYxlbt+notV4y9ynhNB6qmXwGnXbt2ysrKUtOmTXXgwAGNHz9ed999t77++msVFhYqNDRUUVFRPutER0ersLBQklRYWOgTbiqWVyy7VJuSkhKdOnVK4eHhF+xbZmamxo8ff9787OxsRURE+LObl2Vim/KA16xMK1asqOouAAHHOIQpprSt6h5cGZfLFfCaJ0+eDEgdvwJOt27drP83b95c7dq1U+PGjbVkyZKLBo/fy5gxY5Senm5Nl5SUKC4uTsnJybLb7QHbjsfjkcvl0vNbguUuDwpY3cr2dQaX92AOxiFMc3vG9XWfqS3Yq4ltytWlSxeFhIQEtHbFFZir5fclqrNFRUXplltu0ffff68uXbqotLRUR44c8TmLU1RUZN2zExMTo02bNvnUqHjK6uw25z55VVRUJLvdfskQZbPZZLPZzpsfEhIS8IMvSe7yILnLrp831so4BkBVYxzCFNfT6/hslfEZG6h6V/U9OMePH9cPP/ygBg0aqHXr1goJCVFOTo61fM+ePSooKJDD4ZAkORwO7dy5U8XFxVYbl8slu92uxMREq83ZNSraVNQAAAD4LX4FnJEjR2rNmjX66aeftH79ej344IOqVq2aHnvsMUVGRqp///5KT0/XF198oby8PD311FNyOBxq3769JCk5OVmJiYl64okntH37dq1atUpjx45VWlqadfZl8ODB+vHHHzVq1Ch9++23eu2117RkyRKNGDEi8HsPAACM5Nclqv/5n//RY489poMHD6pevXq66667tGHDBtWrV0+SNH36dAUHB6tXr15yu91yOp167bXXrPWrVaumZcuWaciQIXI4HKpRo4b69eunCRMmWG3i4+O1fPlyjRgxQjNnzlTDhg311ltv8Yg4AAC4bH4FnHffffeSy8PCwjR79mzNnj37om0aN278m08SdOrUSdu2bfOnawAAABZ+iwoAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABjnqgLOiy++qKCgIA0fPtyad/r0aaWlpalOnTqqWbOmevXqpaKiIp/1CgoKlJKSooiICNWvX1/PPfeczpw549Nm9erVatWqlWw2m5o0aaKsrKyr6SoAAPgDueKAs3nzZr3xxhtq3ry5z/wRI0Zo6dKleu+997RmzRrt379fDz30kLW8rKxMKSkpKi0t1fr16zV//nxlZWVp3LhxVpt9+/YpJSVF9913n/Lz8zV8+HANGDBAq1atutLuAgCAP5ArCjjHjx9Xamqq/v73v+uGG26w5h89elRz587VtGnT1LlzZ7Vu3Vrz5s3T+vXrtWHDBklSdna2du/erf/+7/9WixYt1K1bN02cOFGzZ89WaWmpJGnOnDmKj4/X1KlTlZCQoKFDh+rhhx/W9OnTA7DLAADAdNWvZKW0tDSlpKQoKSlJkyZNsubn5eXJ4/EoKSnJmnfrrbeqUaNGys3NVfv27ZWbm6tmzZopOjraauN0OjVkyBDt2rVLLVu2VG5urk+NijZnXwo7l9vtltvttqZLSkokSR6PRx6P50p284IqatmCvQGr+XsI5DEAqhrjEKaxVbu+XssVY68yXtOBqul3wHn33Xe1detWbd68+bxlhYWFCg0NVVRUlM/86OhoFRYWWm3ODjcVyyuWXapNSUmJTp06pfDw8PO2nZmZqfHjx583Pzs7WxEREZe/g5dpYpvygNesTCtWrKjqLgABxziEKaa0reoeXBmXyxXwmidPngxIHb8Czs8//6xnn31WLpdLYWFhAelAoIwZM0bp6enWdElJieLi4pScnCy73R6w7Xg8HrlcLj2/JVju8qCA1a1sX2c4q7oLQMAwDmGa2zOur3tMbcFeTWxTri5duigkJCSgtSuuwFwtvwJOXl6eiouL1apVK2teWVmZ1q5dq1dffVWrVq1SaWmpjhw54nMWp6ioSDExMZKkmJgYbdq0yaduxVNWZ7c598mroqIi2e32C569kSSbzSabzXbe/JCQkIAffElylwfJXXb9vLFWxjEAqhrjEKa4nl7HZ6uMz9hA1fPrJuP7779fO3fuVH5+vvWvTZs2Sk1Ntf4fEhKinJwca509e/aooKBADodDkuRwOLRz504VFxdbbVwul+x2uxITE602Z9eoaFNRAwAA4FL8OoNTq1Yt3X777T7zatSooTp16ljz+/fvr/T0dNWuXVt2u13PPPOMHA6H2rdvL0lKTk5WYmKinnjiCU2ZMkWFhYUaO3as0tLSrDMwgwcP1quvvqpRo0bp6aef1ueff64lS5Zo+fLlgdhnAABguCt6iupSpk+fruDgYPXq1Utut1tOp1OvvfaatbxatWpatmyZhgwZIofDoRo1aqhfv36aMGGC1SY+Pl7Lly/XiBEjNHPmTDVs2FBvvfWWnE6uXwMAgN921QFn9erVPtNhYWGaPXu2Zs+efdF1Gjdu/JtPE3Tq1Enbtm272u4BAIA/IH6LCgAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMI5fAef1119X8+bNZbfbZbfb5XA49Omnn1rLT58+rbS0NNWpU0c1a9ZUr169VFRU5FOjoKBAKSkpioiIUP369fXcc8/pzJkzPm1Wr16tVq1ayWazqUmTJsrKyrryPQQAAH84fgWchg0b6sUXX1ReXp62bNmizp07q2fPntq1a5ckacSIEVq6dKnee+89rVmzRvv379dDDz1krV9WVqaUlBSVlpZq/fr1mj9/vrKysjRu3Dirzb59+5SSkqL77rtP+fn5Gj58uAYMGKBVq1YFaJcBAIDpqvvTuEePHj7TkydP1uuvv64NGzaoYcOGmjt3rhYtWqTOnTtLkubNm6eEhARt2LBB7du3V3Z2tnbv3q3PPvtM0dHRatGihSZOnKjRo0crIyNDoaGhmjNnjuLj4zV16lRJUkJCgtatW6fp06fL6XQGaLcBAIDJrvgenLKyMr377rs6ceKEHA6H8vLy5PF4lJSUZLW59dZb1ahRI+Xm5kqScnNz1axZM0VHR1ttnE6nSkpKrLNAubm5PjUq2lTUAAAA+C1+ncGRpJ07d8rhcOj06dOqWbOmPvzwQyUmJio/P1+hoaGKioryaR8dHa3CwkJJUmFhoU+4qVhesexSbUpKSnTq1CmFh4dfsF9ut1tut9uaLikpkSR5PB55PB5/d/OiKmrZgr0Bq/l7COQxAKoa4xCmsVW7vl7LFWOvMl7Tgarpd8Bp2rSp8vPzdfToUf3zn/9Uv379tGbNmoB05mpkZmZq/Pjx583Pzs5WREREwLc3sU15wGtWphUrVlR1F4CAYxzCFFPaVnUProzL5Qp4zZMnTwakjt8BJzQ0VE2aNJEktW7dWps3b9bMmTP16KOPqrS0VEeOHPE5i1NUVKSYmBhJUkxMjDZt2uRTr+Ipq7PbnPvkVVFRkex2+0XP3kjSmDFjlJ6ebk2XlJQoLi5OycnJstvt/u7mRXk8HrlcLj2/JVju8qCA1a1sX2dw/xLMwTiEaW7PuL4epLEFezWxTbm6dOmikJCQgNauuAJztfwOOOcqLy+X2+1W69atFRISopycHPXq1UuStGfPHhUUFMjhcEiSHA6HJk+erOLiYtWvX1/Sr+nPbrcrMTHRanPuXzkul8uqcTE2m002m+28+SEhIQE/+JLkLg+Su+z6eWOtjGMAVDXGIUxxPb2Oz1YZn7GBqudXwBkzZoy6deumRo0a6dixY1q0aJFWr16tVatWKTIyUv3791d6erpq164tu92uZ555Rg6HQ+3bt5ckJScnKzExUU888YSmTJmiwsJCjR07VmlpaVY4GTx4sF599VWNGjVKTz/9tD7//HMtWbJEy5cvD8gOAwAA8/kVcIqLi9W3b18dOHBAkZGRat68uVatWqUuXbpIkqZPn67g4GD16tVLbrdbTqdTr732mrV+tWrVtGzZMg0ZMkQOh0M1atRQv379NGHCBKtNfHy8li9frhEjRmjmzJlq2LCh3nrrLR4RBwAAl82vgDN37txLLg8LC9Ps2bM1e/bsi7Zp3Ljxb95o16lTJ23bts2frgEAAFj4LSoAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcvwJOZmam7rzzTtWqVUv169fXAw88oD179vi0OX36tNLS0lSnTh3VrFlTvXr1UlFRkU+bgoICpaSkKCIiQvXr19dzzz2nM2fO+LRZvXq1WrVqJZvNpiZNmigrK+vK9hAAAPzh+BVw1qxZo7S0NG3YsEEul0sej0fJyck6ceKE1WbEiBFaunSp3nvvPa1Zs0b79+/XQw89ZC0vKytTSkqKSktLtX79es2fP19ZWVkaN26c1Wbfvn1KSUnRfffdp/z8fA0fPlwDBgzQqlWrArDLAADAdNX9abxy5Uqf6aysLNWvX195eXm65557dPToUc2dO1eLFi1S586dJUnz5s1TQkKCNmzYoPbt2ys7O1u7d+/WZ599pujoaLVo0UITJ07U6NGjlZGRodDQUM2ZM0fx8fGaOnWqJCkhIUHr1q3T9OnT5XQ6A7TrAADAVH4FnHMdPXpUklS7dm1JUl5enjwej5KSkqw2t956qxo1aqTc3Fy1b99eubm5atasmaKjo602TqdTQ4YM0a5du9SyZUvl5ub61KhoM3z48Iv2xe12y+12W9MlJSWSJI/HI4/HczW76aOili3YG7Cav4dAHgOgqjEOYRpbtevrtVwx9irjNR2omlcccMrLyzV8+HB17NhRt99+uySpsLBQoaGhioqK8mkbHR2twsJCq83Z4aZiecWyS7UpKSnRqVOnFB4efl5/MjMzNX78+PPmZ2dnKyIi4sp28hImtikPeM3KtGLFiqruAhBwjEOYYkrbqu7BlXG5XAGvefLkyYDUueKAk5aWpq+//lrr1q0LSEeu1pgxY5Senm5Nl5SUKC4uTsnJybLb7QHbjsfjkcvl0vNbguUuDwpY3cr2dQaX9mAOxiFMc3vG9XWPqS3Yq4ltytWlSxeFhIQEtHbFFZirdUUBZ+jQoVq2bJnWrl2rhg0bWvNjYmJUWlqqI0eO+JzFKSoqUkxMjNVm06ZNPvUqnrI6u825T14VFRXJbrdf8OyNJNlsNtlstvPmh4SEBPzgS5K7PEjusuvnjbUyjgFQ1RiHMMX19Do+W2V8xgaqnl9PUXm9Xg0dOlQffvihPv/8c8XHx/ssb926tUJCQpSTk2PN27NnjwoKCuRwOCRJDodDO3fuVHFxsdXG5XLJbrcrMTHRanN2jYo2FTUAAAAuxa8zOGlpaVq0aJE+/vhj1apVy7pnJjIyUuHh4YqMjFT//v2Vnp6u2rVry26365lnnpHD4VD79u0lScnJyUpMTNQTTzyhKVOmqLCwUGPHjlVaWpp1Bmbw4MF69dVXNWrUKD399NP6/PPPtWTJEi1fvjzAuw8AAEzk1xmc119/XUePHlWnTp3UoEED69/ixYutNtOnT9ef//xn9erVS/fcc49iYmL0wQcfWMurVaumZcuWqVq1anI4HHr88cfVt29fTZgwwWoTHx+v5cuXy+Vy6Y477tDUqVP11ltv8Yg4AAC4LH6dwfF6f/sxtrCwMM2ePVuzZ8++aJvGjRv/5tMEnTp10rZt2/zpHgAAgCR+iwoAABiIgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABjH74Czdu1a9ejRQ7GxsQoKCtJHH33ks9zr9WrcuHFq0KCBwsPDlZSUpL179/q0OXTokFJTU2W32xUVFaX+/fvr+PHjPm127Nihu+++W2FhYYqLi9OUKVP83zsAAPCH5HfAOXHihO644w7Nnj37gsunTJmiV155RXPmzNHGjRtVo0YNOZ1OnT592mqTmpqqXbt2yeVyadmyZVq7dq0GDRpkLS8pKVFycrIaN26svLw8/e1vf1NGRobefPPNK9hFAADwR1Pd3xW6deumbt26XXCZ1+vVjBkzNHbsWPXs2VOStGDBAkVHR+ujjz5S79699c0332jlypXavHmz2rRpI0maNWuWunfvrpdfflmxsbFauHChSktL9fbbbys0NFS33Xab8vPzNW3aNJ8gBAAAcCF+B5xL2bdvnwoLC5WUlGTNi4yMVLt27ZSbm6vevXsrNzdXUVFRVriRpKSkJAUHB2vjxo168MEHlZubq3vuuUehoaFWG6fTqZdeekmHDx/WDTfccN623W633G63NV1SUiJJ8ng88ng8AdvHilq2YG/Aav4eAnkMgKrGOIRpbNWur9dyxdirjNd0oGoGNOAUFhZKkqKjo33mR0dHW8sKCwtVv359305Ur67atWv7tImPjz+vRsWyCwWczMxMjR8//rz52dnZioiIuMI9uriJbcoDXrMyrVixoqq7AAQc4xCmmNK2qntwZVwuV8Brnjx5MiB1AhpwqtKYMWOUnp5uTZeUlCguLk7Jycmy2+0B247H45HL5dLzW4LlLg8KWN3K9nWGs6q7AAQM4xCmuT1jVVV3wS+2YK8mtilXly5dFBISEtDaFVdgrlZAA05MTIwkqaioSA0aNLDmFxUVqUWLFlab4uJin/XOnDmjQ4cOWevHxMSoqKjIp03FdEWbc9lsNtlstvPmh4SEBPzgS5K7PEjusuvnjbUyjgFQ1RiHMMX19Do+W2V8xgaqXkC/Byc+Pl4xMTHKycmx5pWUlGjjxo1yOBySJIfDoSNHjigvL89q8/nnn6u8vFzt2rWz2qxdu9bnOpzL5VLTpk0veHkKAADgbH4HnOPHjys/P1/5+fmSfr2xOD8/XwUFBQoKCtLw4cM1adIkffLJJ9q5c6f69u2r2NhYPfDAA5KkhIQEde3aVQMHDtSmTZv01VdfaejQoerdu7diY2MlSX369FFoaKj69++vXbt2afHixZo5c6bPJSgAAICL8fsS1ZYtW3TfffdZ0xWho1+/fsrKytKoUaN04sQJDRo0SEeOHNFdd92llStXKiwszFpn4cKFGjp0qO6//34FBwerV69eeuWVV6zlkZGRys7OVlpamlq3bq26detq3LhxPCIOAAAui98Bp1OnTvJ6L/44W1BQkCZMmKAJEyZctE3t2rW1aNGiS26nefPm+vLLL/3tHgAAAL9FBQAAzEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjEPAAQAAxiHgAAAA4xBwAACAcQg4AADAOAQcAABgHAIOAAAwDgEHAAAYh4ADAACMQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIxDwAEAAMYh4AAAAOMQcAAAgHEIOAAAwDgEHAAAYBwCDgAAMA4BBwAAGIeAAwAAjHNNB5zZs2frpptuUlhYmNq1a6dNmzZVdZcAAMB14JoNOIsXL1Z6erpeeOEFbd26VXfccYecTqeKi4urumsAAOAad80GnGnTpmngwIF66qmnlJiYqDlz5igiIkJvv/12VXcNAABc46pXdQcupLS0VHl5eRozZow1Lzg4WElJScrNzb3gOm63W26325o+evSoJOnQoUPyeDwB65vH49HJkydV3ROssvKggNWtbAcPHqzqLgABwziEaaqfOVHVXfBL9XKvTp4s18GDBxUSEhLQ2seOHZMkeb3eq6pzTQacX375RWVlZYqOjvaZHx0drW+//faC62RmZmr8+PHnzY+Pj6+UPl5v6k6t6h4AYBzCJH0quf6xY8cUGRl5xetfkwHnSowZM0bp6enWdHl5uQ4dOqQ6deooKChwf+GVlJQoLi5OP//8s+x2e8DqArh8jEOgalXmGPR6vTp27JhiY2Ovqs41GXDq1q2ratWqqaioyGd+UVGRYmJiLriOzWaTzWbzmRcVFVVZXZTdbueNFahijEOgalXWGLyaMzcVrsmbjENDQ9W6dWvl5ORY88rLy5WTkyOHw1GFPQMAANeDa/IMjiSlp6erX79+atOmjdq2basZM2boxIkTeuqpp6q6awAA4Bp3zQacRx99VP/3f/+ncePGqbCwUC1atNDKlSvPu/H492az2fTCCy+cdzkMwO+HcQhUrethDAZ5r/Y5LAAAgGvMNXkPDgAAwNUg4AAAAOMQcAAAgHEIONeo1atXKygoSEeOHKnqrgDXjMsdFzfddJNmzJjxu/QJwKVlZGSoRYsWv/t2CTgB8tNPPykoKEj5+flV3RXAWB06dNCBAwesLwHLysq64Bd6bt68WYMGDfqdewcgKChIH330kc+8kSNH+nyv3e/lmn1M3FSlpaUKDQ2t6m4A16XQ0NCLfpv52erVq/c79AbA5ahZs6Zq1qz5u2/XiDM4nTp10rBhwzRq1CjVrl1bMTExysjIsJYfOXJEAwYMUL169WS329W5c2dt377dWv7DDz+oZ8+eio6OVs2aNXXnnXfqs88+89nGhVJpVFSUsrKyJP3/H/Vs2bKlgoKC1KlTJ0nSk08+qQceeECTJ09WbGysmjZtKkl655131KZNG9WqVUsxMTHq06ePiouLA3tggCrQqVMnDR06VEOHDlVkZKTq1q2r559/3vpl4MOHD6tv37664YYbFBERoW7dumnv3r3W+v/617/Uo0cP3XDDDapRo4Zuu+02rVixQpLvJarVq1frqaee0tGjRxUUFKSgoCBr3J99iapPnz569NFHffro8XhUt25dLViwQNKv35SemZmp+Ph4hYeH64477tA///nPSj5SQOBc7eegJE2aNEn169dXrVq1NGDAAP3lL3/xubS0efNmdenSRXXr1lVkZKTuvfdebd261Vp+0003SZIefPBBBQUFWdNnX6LKzs5WWFjYeZeZn332WXXu3NmaXrdune6++26Fh4crLi5Ow4YN04kT/v3iuhEBR5Lmz5+vGjVqaOPGjZoyZYomTJggl8slSXrkkUdUXFysTz/9VHl5eWrVqpXuv/9+HTp0SJJ0/Phxde/eXTk5Odq2bZu6du2qHj16qKCg4LK3v2nTJknSZ599pgMHDuiDDz6wluXk5GjPnj1yuVxatmyZpF/fYCdOnKjt27fro48+0k8//aQnn3wyQEcDqFrz589X9erVtWnTJs2cOVPTpk3TW2+9JenX0L9lyxZ98sknys3NldfrVffu3eXxeCRJaWlpcrvdWrt2rXbu3KmXXnrpgn/9dejQQTNmzJDdbteBAwd04MABjRw58rx2qampWrp0qY4fP27NW7VqlU6ePKkHH3xQkpSZmakFCxZozpw52rVrl0aMGKHHH39ca9asqYzDA1SKq/kcXLhwoSZPnqyXXnpJeXl5atSokV5//XWf+seOHVO/fv20bt06bdiwQTfffLO6d++uY8eOSfo1AEnSvHnzdODAAWv6bPfff7+ioqL0/vvvW/PKysq0ePFipaamSvr1pEPXrl3Vq1cv7dixQ4sXL9a6des0dOhQ/w6I1wD33nuv96677vKZd+edd3pHjx7t/fLLL712u917+vRpn+V/+tOfvG+88cZFa952223eWbNmWdOSvB9++KFPm8jISO+8efO8Xq/Xu2/fPq8k77Zt23za9OvXzxsdHe11u92X3IfNmzd7JXmPHTvm9Xq93i+++MIryXv48OFLrgdca+69915vQkKCt7y83Jo3evRob0JCgve7777zSvJ+9dVX1rJffvnFGx4e7l2yZInX6/V6mzVr5s3IyLhg7XPHxbx587yRkZHntWvcuLF3+vTpXq/X6/V4PN66det6FyxYYC1/7LHHvI8++qjX6/V6T58+7Y2IiPCuX7/ep0b//v29jz32mN/7D1SFq/0cbNeunTctLc1neceOHb133HHHRbdZVlbmrVWrlnfp0qXWvAt9Vr7wwgs+dZ599llv586drelVq1Z5bTabNa779+/vHTRokE+NL7/80hscHOw9derURftzLmPO4DRv3txnukGDBiouLtb27dt1/Phx1alTx7oOWLNmTe3bt08//PCDpF/P4IwcOVIJCQmKiopSzZo19c033/h1BudSmjVrdt59N3l5eerRo4caNWqkWrVq6d5775WkgG0TqErt27dXUFCQNe1wOLR3717t3r1b1atXV7t27axlderUUdOmTfXNN99IkoYNG6ZJkyapY8eOeuGFF7Rjx46r6kv16tX1b//2b1q4cKEk6cSJE/r444+tvxa///57nTx5Ul26dPF5j1iwYIH1HgFcD67mc3DPnj1q27atz/rnThcVFWngwIG6+eabFRkZKbvdruPHj/v9uZWamqrVq1dr//79kn49e5SSkmI9MLB9+3ZlZWX59NXpdKq8vFz79u277O0Yc5NxSEiIz3RQUJDKy8t1/PhxNWjQQKtXrz5vnYqDOXLkSLlcLr388stq0qSJwsPD9fDDD6u0tNSnnvecX7WoOKX+W2rUqOEzfeLECTmdTjmdTi1cuFD16tVTQUGBnE6nzzaBP6IBAwbI6XRq+fLlys7OVmZmpqZOnapnnnnmimumpqbq3nvvVXFxsVwul8LDw9W1a1dJsi5dLV++XDfeeKPPetfy7+wA57qaz8HL0a9fPx08eFAzZ85U48aNZbPZ5HA4/P7cuvPOO/WnP/1J7777roYMGaIPP/zQup9V+nVM/vu//7uGDRt23rqNGjW67O0YE3AuplWrViosLFT16tWtG57O9dVXX+nJJ5+0rscfP35cP/30k0+bevXq6cCBA9b03r17dfLkSWu64gxNWVnZb/bp22+/1cGDB/Xiiy8qLi5OkrRlyxZ/dgu4pm3cuNFnuuJ6fWJios6cOaONGzeqQ4cOkqSDBw9qz549SkxMtNrHxcVp8ODBGjx4sMaMGaO///3vFww4oaGhlzXmOnTooLi4OC1evFiffvqpHnnkEevDIDExUTabTQUFBdaZVMAkl/M52LRpU23evFl9+/a15p17D81XX32l1157Td27d5ck/fzzz/rll1982oSEhFzWmExNTdXChQvVsGFDBQcHKyUlxae/u3fvVpMmTS53Fy/ImEtUF5OUlCSHw6EHHnhA2dnZ+umnn7R+/Xr913/9lxUqbr75Zn3wwQfKz8/X9u3b1adPH5WXl/vU6dy5s1599VVt27ZNW7Zs0eDBg33Scv369RUeHq6VK1eqqKhIR48evWifGjVqpNDQUM2aNUs//vijPvnkE02cOLFyDgBQBQoKCpSenq49e/boH//4h2bNmqVnn31WN998s3r27KmBAwdq3bp12r59ux5//HHdeOON6tmzpyRp+PDhWrVqlfbt26etW7fqiy++UEJCwgW3c9NNN+n48ePKycnRL7/84vNHx7n69OmjOXPmyOVyWZenJKlWrVoaOXKkRowYofnz5+uHH37Q1q1bNWvWLM2fPz+wBwaoApfzOfjMM89o7ty5mj9/vvbu3atJkyZpx44dPpeab775Zr3zzjv65ptvtHHjRqWmpio8PNxnWzfddJNycnJUWFiow4cPX7RPqamp2rp1qyZPnqyHH37Y52zp6NGjtX79eg0dOlT5+fnau3evPv74Y79vMjY+4AQFBWnFihW655579NRTT+mWW25R79699a9//UvR0dGSpGnTpumGG25Qhw4d1KNHDzmdTrVq1cqnztSpUxUXF6e7775bffr00ciRIxUREWEtr169ul555RW98cYbio2Ntd6sL6RevXrKysrSe++9p8TERL344ot6+eWXK+cAAFWgb9++OnXqlNq2bau0tDQ9++yz1hfvzZs3T61bt9af//xnORwOeb1erVixwvqDoaysTGlpaUpISFDXrl11yy236LXXXrvgdjp06KDBgwfr0UcfVb169TRlypSL9ik1NVW7d+/WjTfeqI4dO/osmzhxop5//nllZmZa212+fLn19Q/A9exyPgdTU1M1ZswYjRw5Uq1atdK+ffv05JNPKiwszKozd+5cHT58WK1atdITTzyhYcOGqX79+j7bmjp1qlwul+Li4tSyZcuL9qlJkyZq27atduzY4fMHh/TrvURr1qzRd999p7vvvlstW7bUuHHjFBsb699+e8+9sQQArkKnTp3UokULfioBuM516dJFMTExeuedd6q6K1fE+HtwAADApZ08eVJz5syR0+lUtWrV9I9//EOfffaZ9T061yMCDgAAf3AVl7EmT56s06dPq2nTpnr//feVlJRU1V27YlyiAgAAxjH+JmMAAPDHQ8ABAADGIeAAAADjEHAAAIBxCDgAAMA4BBwAAGAcAg4AADAOAQcAABiHgAMAAIzz/wDxzgecrN25DQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "reviews_test_sample.sentiment.hist()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.656494Z",
     "iopub.status.busy": "2023-11-07T17:51:03.655417Z",
     "iopub.status.idle": "2023-11-07T17:51:03.661386Z",
     "shell.execute_reply": "2023-11-07T17:51:03.660352Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.656458Z"
    }
   },
   "outputs": [],
   "source": [
    "reviews_sentiment_map = {\n",
    "    'negative': 0,\n",
    "    'neautral': 1,\n",
    "    'positive': 2\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.663630Z",
     "iopub.status.busy": "2023-11-07T17:51:03.662617Z",
     "iopub.status.idle": "2023-11-07T17:51:03.692662Z",
     "shell.execute_reply": "2023-11-07T17:51:03.691790Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.663594Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>review</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>45140</th>\n",
       "      <td>Ехала долго посылка, больше полутора месяца. Д...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82647</th>\n",
       "      <td>Очень красивый и приятный на ощупь))</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39454</th>\n",
       "      <td>нитки торчат, прошито очень неаккуратно</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71219</th>\n",
       "      <td>Доставка быстрая. По размеру подошло 46 р-р. Т...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45132</th>\n",
       "      <td>Пахнет</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72801</th>\n",
       "      <td>Отлично всё - пошив, качество, фасон. Брала сп...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67107</th>\n",
       "      <td>Пришли за три недели, на 39 идеально!!! Понрав...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1487</th>\n",
       "      <td>Плохое качество и велик размер S. Хотя у меня ...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9131</th>\n",
       "      <td>Прошло 4 месяца с продом.продлевали защиту тов...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3752</th>\n",
       "      <td>Товар так и не пришел! Пришел подарок от продо...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  review  sentiment\n",
       "45140  Ехала долго посылка, больше полутора месяца. Д...          1\n",
       "82647              Очень красивый и приятный на ощупь))           2\n",
       "39454            нитки торчат, прошито очень неаккуратно          1\n",
       "71219  Доставка быстрая. По размеру подошло 46 р-р. Т...          2\n",
       "45132                                            Пахнет           1\n",
       "72801  Отлично всё - пошив, качество, фасон. Брала сп...          2\n",
       "67107  Пришли за три недели, на 39 идеально!!! Понрав...          2\n",
       "1487   Плохое качество и велик размер S. Хотя у меня ...          0\n",
       "9131   Прошло 4 месяца с продом.продлевали защиту тов...          0\n",
       "3752   Товар так и не пришел! Пришел подарок от продо...          0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviews_test_sample.sentiment = reviews_test_sample.sentiment\\\n",
    "    .apply(lambda x: reviews_sentiment_map[x])\n",
    "reviews_test_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.694326Z",
     "iopub.status.busy": "2023-11-07T17:51:03.693647Z",
     "iopub.status.idle": "2023-11-07T17:51:03.701457Z",
     "shell.execute_reply": "2023-11-07T17:51:03.700563Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.694292Z"
    }
   },
   "outputs": [],
   "source": [
    "class SentimentAnalysisDataset(Dataset):\n",
    "    def __init__(self, X, y, tokenizer=None, num_labels=3, max_length=2048):\n",
    "        self.X = tokenizer(X.tolist(), \n",
    "                           truncation=True, \n",
    "                           padding=True,\n",
    "                           return_tensors=\"pt\",\n",
    "                           max_length=max_length).to(device)\n",
    "        self.y = torch.tensor(y.to_numpy(), dtype=torch.int64).to(device)\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.y)\n",
    "    \n",
    "    def __getitem__(self, idx):\n",
    "        return self.X.input_ids[idx], self.X.attention_mask[idx], self.y[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.702795Z",
     "iopub.status.busy": "2023-11-07T17:51:03.702536Z",
     "iopub.status.idle": "2023-11-07T17:51:03.711987Z",
     "shell.execute_reply": "2023-11-07T17:51:03.711265Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.702771Z"
    }
   },
   "outputs": [],
   "source": [
    "X = reviews_test_sample.review\n",
    "y = reviews_test_sample.sentiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.713739Z",
     "iopub.status.busy": "2023-11-07T17:51:03.713087Z",
     "iopub.status.idle": "2023-11-07T17:51:03.724885Z",
     "shell.execute_reply": "2023-11-07T17:51:03.724118Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.713701Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<pad>\n",
      "[PAD]\n"
     ]
    }
   ],
   "source": [
    "tokenizer.pad_token_id = 0\n",
    "rubert_tokenizer.pad_token_id = 0\n",
    "print(tokenizer.pad_token)\n",
    "print(rubert_tokenizer.pad_token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:03.726177Z",
     "iopub.status.busy": "2023-11-07T17:51:03.725888Z",
     "iopub.status.idle": "2023-11-07T17:51:40.360939Z",
     "shell.execute_reply": "2023-11-07T17:51:40.359944Z",
     "shell.execute_reply.started": "2023-11-07T17:51:03.726153Z"
    }
   },
   "outputs": [],
   "source": [
    "reviews_test_dataset = SentimentAnalysisDataset(X, y, tokenizer)\n",
    "rubert_reviews_test_dataset = SentimentAnalysisDataset(X, y, rubert_tokenizer, \n",
    "                                                       max_length=512)\n",
    "\n",
    "free_memory()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:40.362352Z",
     "iopub.status.busy": "2023-11-07T17:51:40.362042Z",
     "iopub.status.idle": "2023-11-07T17:51:40.367009Z",
     "shell.execute_reply": "2023-11-07T17:51:40.365999Z",
     "shell.execute_reply.started": "2023-11-07T17:51:40.362326Z"
    }
   },
   "outputs": [],
   "source": [
    "reviews_test_loader = DataLoader(reviews_test_dataset, batch_size=32)\n",
    "rubert_reviews_test_loader = DataLoader(rubert_reviews_test_dataset, batch_size=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:40.368792Z",
     "iopub.status.busy": "2023-11-07T17:51:40.368444Z",
     "iopub.status.idle": "2023-11-07T17:51:40.380761Z",
     "shell.execute_reply": "2023-11-07T17:51:40.379982Z",
     "shell.execute_reply.started": "2023-11-07T17:51:40.368759Z"
    }
   },
   "outputs": [],
   "source": [
    "def evaluate_model(model, data_loader):\n",
    "    ground_truth = []\n",
    "    predictions = []\n",
    "\n",
    "    model.eval()\n",
    "    for input_ids, attention_mask, labels in data_loader:\n",
    "        input_ids = input_ids.to(device)\n",
    "        attention_mask = attention_mask.to(device)\n",
    "        labels = labels.to(device)\n",
    "        outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n",
    "        logits = torch.mean(outputs.logits, dim=1)\n",
    "        _, indices = torch.max(logits, 1)\n",
    "        predictions.extend(indices.tolist())\n",
    "        ground_truth.extend(labels.tolist())\n",
    "        del input_ids\n",
    "        del attention_mask\n",
    "        del labels\n",
    "        del outputs\n",
    "        del logits\n",
    "        del indices\n",
    "        free_memory()\n",
    "    accuracy = accuracy_score(ground_truth, predictions)\n",
    "    f1 = f1_score(ground_truth, predictions, average='macro')\n",
    "    del predictions\n",
    "    del ground_truth\n",
    "    free_memory()\n",
    "    return accuracy, f1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:40.381942Z",
     "iopub.status.busy": "2023-11-07T17:51:40.381707Z",
     "iopub.status.idle": "2023-11-07T17:51:40.400725Z",
     "shell.execute_reply": "2023-11-07T17:51:40.399894Z",
     "shell.execute_reply.started": "2023-11-07T17:51:40.381921Z"
    }
   },
   "outputs": [],
   "source": [
    "def evaluate_bert_model(model, data_loader):\n",
    "    ground_truth = []\n",
    "    predictions = []\n",
    "\n",
    "    model.eval()\n",
    "    for input_ids, attention_mask, labels in data_loader:\n",
    "        input_ids = input_ids.to(device)\n",
    "        attention_mask = attention_mask.to(device)\n",
    "        labels = labels.to(device)\n",
    "        outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n",
    "        _, indices = torch.max(outputs.logits, 1)\n",
    "        predictions.extend(indices.tolist())\n",
    "        ground_truth.extend(labels.tolist())\n",
    "        del input_ids\n",
    "        del attention_mask\n",
    "        del labels\n",
    "        del outputs\n",
    "        del indices\n",
    "        free_memory()\n",
    "    accuracy = accuracy_score(ground_truth, predictions)\n",
    "    f1 = f1_score(ground_truth, predictions, average='macro')\n",
    "    del predictions\n",
    "    del ground_truth\n",
    "    free_memory()\n",
    "    return accuracy, f1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T17:51:40.402216Z",
     "iopub.status.busy": "2023-11-07T17:51:40.401829Z",
     "iopub.status.idle": "2023-11-07T18:00:23.729261Z",
     "shell.execute_reply": "2023-11-07T18:00:23.728357Z",
     "shell.execute_reply.started": "2023-11-07T17:51:40.402181Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviews test accuracy from large text model is 0.4852840128263857\n",
      "Reviews test f1 score from large text model is 0.48834310344720455\n",
      "CPU times: user 7min 42s, sys: 59.5 s, total: 8min 41s\n",
      "Wall time: 8min 43s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(lt_model, reviews_test_loader)\n",
    "print(f\"Reviews test accuracy from large text model is {accuracy}\")\n",
    "print(f\"Reviews test f1 score from large text model is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:00:23.730588Z",
     "iopub.status.busy": "2023-11-07T18:00:23.730311Z",
     "iopub.status.idle": "2023-11-07T18:09:05.063752Z",
     "shell.execute_reply": "2023-11-07T18:09:05.062734Z",
     "shell.execute_reply.started": "2023-11-07T18:00:23.730563Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviews test accuracy from small text model is 0.7238318827301878\n",
      "Reviews test f1 score from small text model is 0.7217606786663741\n",
      "CPU times: user 7min 42s, sys: 59.5 s, total: 8min 41s\n",
      "Wall time: 8min 41s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(st_model, reviews_test_loader)\n",
    "print(f\"Reviews test accuracy from small text model is {accuracy}\")\n",
    "print(f\"Reviews test f1 score from small text model is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:09:05.065307Z",
     "iopub.status.busy": "2023-11-07T18:09:05.065000Z",
     "iopub.status.idle": "2023-11-07T18:30:26.004613Z",
     "shell.execute_reply": "2023-11-07T18:30:26.003644Z",
     "shell.execute_reply.started": "2023-11-07T18:09:05.065282Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviews test accuracy from small text medium sized model is 0.7186211635364178\n",
      "Reviews test f1 score from small text medium sized model is 0.7154217127279076\n",
      "CPU times: user 19min 38s, sys: 1min 43s, total: 21min 21s\n",
      "Wall time: 21min 20s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(st_medium_model, reviews_test_loader)\n",
    "print(f\"Reviews test accuracy from small text medium sized model is {accuracy}\")\n",
    "print(f\"Reviews test f1 score from small text medium sized model is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:30:26.006092Z",
     "iopub.status.busy": "2023-11-07T18:30:26.005797Z",
     "iopub.status.idle": "2023-11-07T18:35:57.753249Z",
     "shell.execute_reply": "2023-11-07T18:35:57.752277Z",
     "shell.execute_reply.started": "2023-11-07T18:30:26.006055Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviews test accuracy from small text RuBert is 0.6501946862116353\n",
      "Reviews test f1 score from small text RuBert is 0.6474060214332357\n",
      "CPU times: user 5min 20s, sys: 11.4 s, total: 5min 31s\n",
      "Wall time: 5min 31s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_bert_model(st_rubert, rubert_reviews_test_loader)\n",
    "print(f\"Reviews test accuracy from small text RuBert is {accuracy}\")\n",
    "print(f\"Reviews test f1 score from small text RuBert is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:35:57.755004Z",
     "iopub.status.busy": "2023-11-07T18:35:57.754607Z",
     "iopub.status.idle": "2023-11-07T18:41:28.009989Z",
     "shell.execute_reply": "2023-11-07T18:41:28.009087Z",
     "shell.execute_reply.started": "2023-11-07T18:35:57.754973Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviews test accuracy from large text RuBert is 0.5128263857077416\n",
      "Reviews test f1 score from large text RuBert is 0.5064336058420454\n",
      "CPU times: user 5min 19s, sys: 11.4 s, total: 5min 30s\n",
      "Wall time: 5min 30s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_bert_model(lt_rubert, rubert_reviews_test_loader)\n",
    "print(f\"Reviews test accuracy from large text RuBert is {accuracy}\")\n",
    "print(f\"Reviews test f1 score from large text RuBert is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:28.011591Z",
     "iopub.status.busy": "2023-11-07T18:41:28.011303Z",
     "iopub.status.idle": "2023-11-07T18:41:29.460796Z",
     "shell.execute_reply": "2023-11-07T18:41:29.459849Z",
     "shell.execute_reply.started": "2023-11-07T18:41:28.011565Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>id</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Досудебное расследование по факту покупки ЕНПФ...</td>\n",
       "      <td>1945</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Медики рассказали о состоянии пострадавшего му...</td>\n",
       "      <td>1957</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Прошел почти год, как железнодорожным оператор...</td>\n",
       "      <td>1969</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>По итогам 12 месяцев 2016 года на территории р...</td>\n",
       "      <td>1973</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Астана. 21 ноября. Kazakhstan Today - Агентств...</td>\n",
       "      <td>1975</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Об аресте бывшего министра национальной эконом...</td>\n",
       "      <td>1980</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Двое налетчиков совершили нападение на охранни...</td>\n",
       "      <td>1982</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\"Самрук-Энерго\" в I квартале почти вдвое снизи...</td>\n",
       "      <td>1983</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>В 2016 году почти половина коррупционных прест...</td>\n",
       "      <td>1985</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Google+\\nЧт 19 янв 2017 12:35:02\\nОдин из фигу...</td>\n",
       "      <td>1988</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                text    id sentiment\n",
       "0  Досудебное расследование по факту покупки ЕНПФ...  1945  negative\n",
       "1  Медики рассказали о состоянии пострадавшего му...  1957  negative\n",
       "2  Прошел почти год, как железнодорожным оператор...  1969  negative\n",
       "3  По итогам 12 месяцев 2016 года на территории р...  1973  negative\n",
       "4  Астана. 21 ноября. Kazakhstan Today - Агентств...  1975  negative\n",
       "5  Об аресте бывшего министра национальной эконом...  1980  negative\n",
       "6  Двое налетчиков совершили нападение на охранни...  1982  negative\n",
       "7  \"Самрук-Энерго\" в I квартале почти вдвое снизи...  1983  negative\n",
       "8  В 2016 году почти половина коррупционных прест...  1985  negative\n",
       "9  Google+\\nЧт 19 янв 2017 12:35:02\\nОдин из фигу...  1988  negative"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "articles = pd.read_json('data/articles/train.json')\n",
    "articles.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:29.462209Z",
     "iopub.status.busy": "2023-11-07T18:41:29.461877Z",
     "iopub.status.idle": "2023-11-07T18:41:29.475007Z",
     "shell.execute_reply": "2023-11-07T18:41:29.474109Z",
     "shell.execute_reply.started": "2023-11-07T18:41:29.462167Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>id</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5176</th>\n",
       "      <td>В немецком городе Гамбург состоялся экономичес...</td>\n",
       "      <td>7230</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5418</th>\n",
       "      <td>Глава правительства Бакытжан Сагинтаев провел ...</td>\n",
       "      <td>7472</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>856</th>\n",
       "      <td>В среду, 25 января, первый заместитель п...</td>\n",
       "      <td>2910</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8200</th>\n",
       "      <td>Kapital.kz 19 января 2017, 17:11 \\n\\nkapital.k...</td>\n",
       "      <td>10254</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1793</th>\n",
       "      <td>Высококачественные подделки тенге в Казахстане...</td>\n",
       "      <td>3847</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7678</th>\n",
       "      <td>Все отрасли ТЭК. Россия и Мир 20:41, 5.1.17 \\n...</td>\n",
       "      <td>9732</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4840</th>\n",
       "      <td>\\nКАЗАНЬ, 7 октября. /Корр. ТАСС Алексей Угаро...</td>\n",
       "      <td>6894</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5576</th>\n",
       "      <td>О реализации инвестпроектов в агропромышленном...</td>\n",
       "      <td>7630</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7326</th>\n",
       "      <td>Ранее торговый центр был выставлен на торги че...</td>\n",
       "      <td>9380</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5351</th>\n",
       "      <td>Алматы. 26 декабря. Центр информации. Руководс...</td>\n",
       "      <td>7405</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   text     id sentiment\n",
       "5176  В немецком городе Гамбург состоялся экономичес...   7230   neutral\n",
       "5418  Глава правительства Бакытжан Сагинтаев провел ...   7472   neutral\n",
       "856         В среду, 25 января, первый заместитель п...   2910  positive\n",
       "8200  Kapital.kz 19 января 2017, 17:11 \\n\\nkapital.k...  10254  positive\n",
       "1793  Высококачественные подделки тенге в Казахстане...   3847   neutral\n",
       "7678  Все отрасли ТЭК. Россия и Мир 20:41, 5.1.17 \\n...   9732   neutral\n",
       "4840  \\nКАЗАНЬ, 7 октября. /Корр. ТАСС Алексей Угаро...   6894  positive\n",
       "5576  О реализации инвестпроектов в агропромышленном...   7630   neutral\n",
       "7326  Ранее торговый центр был выставлен на торги че...   9380   neutral\n",
       "5351  Алматы. 26 декабря. Центр информации. Руководс...   7405  positive"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "articles_test_sample = articles.sample(2056, random_state=42)\n",
    "articles_test_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:29.480352Z",
     "iopub.status.busy": "2023-11-07T18:41:29.480057Z",
     "iopub.status.idle": "2023-11-07T18:41:29.684231Z",
     "shell.execute_reply": "2023-11-07T18:41:29.683277Z",
     "shell.execute_reply.started": "2023-11-07T18:41:29.480329Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjgAAAGdCAYAAAAfTAk2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAApQElEQVR4nO3deXCUZYLH8V+H3EAnhCMhGiClqARRTqFBhYWQIEgBwzhmjBpdhJFJ5Eghml3OABNhFTkGzTjrcMyCw8y6oiJC2jiASAwQhkNgkXVRrMUkixBCyNA0Se8fVt61OWY43qbh8fupssr36Od9uitv+ku/3WmHz+fzCQAAwCAhwZ4AAACA3QgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYJDfYEAqW+vl5Hjx5V06ZN5XA4gj0dAABwGXw+n06dOqXExESFhFz96zDGBs7Ro0eVlJQU7GkAAICr8M033+jWW2+96tsbGzhNmzaV9P0D5HQ6bRvX6/WqqKhIaWlpCgsLs21cAJeP8xAIrkCeg9XV1UpKSrKex6+WsYHTcFnK6XTaHjjR0dFyOp38YgWChPMQCK7rcQ5e69tLeJMxAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMExrsCdys7p6xQZ66a/sq9+vpq5eGBHsKAABcN7yCAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMc8WBs3nzZg0dOlSJiYlyOBxas2aN33afz6dp06apdevWioqKUmpqqg4dOuS3z/Hjx5WZmSmn06nY2FiNGjVKNTU1fvvs2bNHDzzwgCIjI5WUlKR58+Zd+b0DAAA/SlccOKdPn9a9996rJUuWXHT7vHnztGjRIhUWFqq0tFSNGzdWenq6zpw5Y+2TmZmpffv2ye12a+3atdq8ebPGjBljba+urlZaWpratm2rsrIy/cu//ItmzJihN9544yruIgAA+LG54r+D89BDD+mhhx666Dafz6cFCxZoypQpGjZsmCRpxYoVio+P15o1a5SRkaEDBw5o/fr12r59u7p37y5JWrx4sQYPHqyXX35ZiYmJWrlypc6ePavf/e53Cg8PV8eOHbVr1y7Nnz/fL4QAAAAuxtY/9Hf48GGVl5crNTXVWhcTE6OePXuqpKREGRkZKikpUWxsrBU3kpSamqqQkBCVlpZqxIgRKikp0YMPPqjw8HBrn/T0dM2dO1cnTpxQs2bNLji2x+ORx+OxlqurqyVJXq9XXq/XtvvYMFZEiM+2Ma8HOx8DINgafp75uQaCI5DnoF1j2ho45eXlkqT4+Hi/9fHx8da28vJytWrVyn8SoaGKi4vz2yc5OfmCMRq2XSxwCgoKNHPmzAvWFxUVKTo6+irv0aXN6l5v+5iBtG7dumBPAbCd2+0O9hSAH7VAnIO1tbW2jGPMVzXk5eUpNzfXWq6urlZSUpLS0tLkdDptO47X65Xb7dbUHSHy1N88X9Xw+Yz0YE8BsE3DeThw4ECFhYUFezrAj04gz8GGKzDXytbASUhIkCRVVFSodevW1vqKigp17tzZ2qeystLvdufOndPx48et2yckJKiiosJvn4blhn3OFxERoYiIiAvWh4WFBeQXoKfecVN9FxVPAjBRoM5vAJcnEOegXePZ+ndwkpOTlZCQoOLiYmtddXW1SktL5XK5JEkul0tVVVUqKyuz9vn4449VX1+vnj17Wvts3rzZ7zqc2+3WnXfeedHLUwAAAD90xYFTU1OjXbt2adeuXZK+f2Pxrl27dOTIETkcDk2YMEGzZ8/We++9p7179+rJJ59UYmKihg8fLknq0KGDBg0apNGjR2vbtm369NNPlZOTo4yMDCUmJkqSHnvsMYWHh2vUqFHat2+fVq9erYULF/pdggIAALiUK75EtWPHDv3DP/yDtdwQHVlZWVq2bJkmT56s06dPa8yYMaqqqtL999+v9evXKzIy0rrNypUrlZOTowEDBigkJEQjR47UokWLrO0xMTEqKipSdna2unXrphYtWmjatGl8RBwAAFyWKw6cfv36yee79EekHQ6H8vPzlZ+ff8l94uLitGrVqr95nHvuuUeffPLJlU4PAACA76ICAADmIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxrE9cOrq6jR16lQlJycrKipKt912m2bNmiWfz2ft4/P5NG3aNLVu3VpRUVFKTU3VoUOH/MY5fvy4MjMz5XQ6FRsbq1GjRqmmpsbu6QIAAAPZHjhz587V66+/rl//+tc6cOCA5s6dq3nz5mnx4sXWPvPmzdOiRYtUWFio0tJSNW7cWOnp6Tpz5oy1T2Zmpvbt2ye32621a9dq8+bNGjNmjN3TBQAABgq1e8CtW7dq2LBhGjJkiCSpXbt2euutt7Rt2zZJ3796s2DBAk2ZMkXDhg2TJK1YsULx8fFas2aNMjIydODAAa1fv17bt29X9+7dJUmLFy/W4MGD9fLLLysxMdHuaQMAAIPYHji9e/fWG2+8oS+++EJ33HGHdu/erS1btmj+/PmSpMOHD6u8vFypqanWbWJiYtSzZ0+VlJQoIyNDJSUlio2NteJGklJTUxUSEqLS0lKNGDHiguN6PB55PB5rubq6WpLk9Xrl9Xptu38NY0WE+P7OnjcWOx8DINgafp75uQaCI5DnoF1j2h44L774oqqrq3XXXXepUaNGqqur05w5c5SZmSlJKi8vlyTFx8f73S4+Pt7aVl5erlatWvlPNDRUcXFx1j7nKygo0MyZMy9YX1RUpOjo6Gu+X+eb1b3e9jEDad26dcGeAmA7t9sd7CkAP2qBOAdra2ttGcf2wPnjH/+olStXatWqVerYsaN27dqlCRMmKDExUVlZWXYfzpKXl6fc3Fxrubq6WklJSUpLS5PT6bTtOF6vV263W1N3hMhT77Bt3ED7fEZ6sKcA2KbhPBw4cKDCwsKCPR3gRyeQ52DDFZhrZXvgPP/883rxxReVkZEhSerUqZO+/vprFRQUKCsrSwkJCZKkiooKtW7d2rpdRUWFOnfuLElKSEhQZWWl37jnzp3T8ePHrdufLyIiQhEREResDwsLC8gvQE+9Q566mydweBKAiQJ1fgO4PIE4B+0az/ZPUdXW1iokxH/YRo0aqb7++0s6ycnJSkhIUHFxsbW9urpapaWlcrlckiSXy6WqqiqVlZVZ+3z88ceqr69Xz5497Z4yAAAwjO2v4AwdOlRz5sxRmzZt1LFjR/3lL3/R/Pnz9Y//+I+SJIfDoQkTJmj27Nlq3769kpOTNXXqVCUmJmr48OGSpA4dOmjQoEEaPXq0CgsL5fV6lZOTo4yMDD5BBQAA/i7bA2fx4sWaOnWqfvnLX6qyslKJiYn6xS9+oWnTpln7TJ48WadPn9aYMWNUVVWl+++/X+vXr1dkZKS1z8qVK5WTk6MBAwYoJCREI0eO1KJFi+yeLgAAMJDtgdO0aVMtWLBACxYsuOQ+DodD+fn5ys/Pv+Q+cXFxWrVqld3TAwAAPwJ8FxUAADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4ocGeAABcrbtnbJCnzhHsaVy2r14aEuwpAD8avIIDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjBOQwPmf//kfPf7442revLmioqLUqVMn7dixw9ru8/k0bdo0tW7dWlFRUUpNTdWhQ4f8xjh+/LgyMzPldDoVGxurUaNGqaamJhDTBQAAhrE9cE6cOKE+ffooLCxMH374ofbv369XXnlFzZo1s/aZN2+eFi1apMLCQpWWlqpx48ZKT0/XmTNnrH0yMzO1b98+ud1urV27Vps3b9aYMWPsni4AADBQqN0Dzp07V0lJSVq6dKm1Ljk52fp/n8+nBQsWaMqUKRo2bJgkacWKFYqPj9eaNWuUkZGhAwcOaP369dq+fbu6d+8uSVq8eLEGDx6sl19+WYmJiXZPGwAAGMT2wHnvvfeUnp6uRx55RJs2bdItt9yiX/7ylxo9erQk6fDhwyovL1dqaqp1m5iYGPXs2VMlJSXKyMhQSUmJYmNjrbiRpNTUVIWEhKi0tFQjRoy44Lgej0cej8darq6uliR5vV55vV7b7l/DWBEhPtvGvB7sfAyAYOM8BIKr4Wc5ED/Tdo1pe+D893//t15//XXl5ubqn/7pn7R9+3aNGzdO4eHhysrKUnl5uSQpPj7e73bx8fHWtvLycrVq1cp/oqGhiouLs/Y5X0FBgWbOnHnB+qKiIkVHR9tx1/zM6l5v+5iBtG7dumBPAbAd5yEQXG632/Yxa2trbRnH9sCpr69X9+7d9atf/UqS1KVLF33++ecqLCxUVlaW3Yez5OXlKTc311qurq5WUlKS0tLS5HQ6bTuO1+uV2+3W1B0h8tQ7bBs30D6fkR7sKQC24TwEgqvhHBw4cKDCwsJsHbvhCsy1sj1wWrdurZSUFL91HTp00Ntvvy1JSkhIkCRVVFSodevW1j4VFRXq3LmztU9lZaXfGOfOndPx48et258vIiJCERERF6wPCwuz/cGXJE+9Q566m+cXayAeAyDYOA+B4ArEc6xd49n+Kao+ffro4MGDfuu++OILtW3bVtL3bzhOSEhQcXGxtb26ulqlpaVyuVySJJfLpaqqKpWVlVn7fPzxx6qvr1fPnj3tnjIAADCM7a/gTJw4Ub1799avfvUr/exnP9O2bdv0xhtv6I033pAkORwOTZgwQbNnz1b79u2VnJysqVOnKjExUcOHD5f0/Ss+gwYN0ujRo1VYWCiv16ucnBxlZGTwCSoAAPB32R44PXr00DvvvKO8vDzl5+crOTlZCxYsUGZmprXP5MmTdfr0aY0ZM0ZVVVW6//77tX79ekVGRlr7rFy5Ujk5ORowYIBCQkI0cuRILVq0yO7pAgAAA9keOJL08MMP6+GHH77kdofDofz8fOXn519yn7i4OK1atSoQ0wMAAIbju6gAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQIeOC+99JIcDocmTJhgrTtz5oyys7PVvHlzNWnSRCNHjlRFRYXf7Y4cOaIhQ4YoOjparVq10vPPP69z584FeroAAMAAAQ2c7du36ze/+Y3uuecev/UTJ07U+++/rz/96U/atGmTjh49qp/85CfW9rq6Og0ZMkRnz57V1q1btXz5ci1btkzTpk0L5HQBAIAhAhY4NTU1yszM1G9/+1s1a9bMWn/y5Em9+eabmj9/vvr3769u3bpp6dKl2rp1qz777DNJUlFRkfbv369/+7d/U+fOnfXQQw9p1qxZWrJkic6ePRuoKQMAAEOEBmrg7OxsDRkyRKmpqZo9e7a1vqysTF6vV6mpqda6u+66S23atFFJSYl69eqlkpISderUSfHx8dY+6enpGjt2rPbt26cuXbpccDyPxyOPx2MtV1dXS5K8Xq+8Xq9t96thrIgQn21jXg92PgZAsHEeAsHV8LMciJ9pu8YMSOD84Q9/0M6dO7V9+/YLtpWXlys8PFyxsbF+6+Pj41VeXm7t88O4adjesO1iCgoKNHPmzAvWFxUVKTo6+mruxt80q3u97WMG0rp164I9BcB2nIdAcLndbtvHrK2ttWUc2wPnm2++0fjx4+V2uxUZGWn38JeUl5en3Nxca7m6ulpJSUlKS0uT0+m07Ther1dut1tTd4TIU++wbdxA+3xGerCnANiG8xAIroZzcODAgQoLC7N17IYrMNfK9sApKytTZWWlunbtaq2rq6vT5s2b9etf/1obNmzQ2bNnVVVV5fcqTkVFhRISEiRJCQkJ2rZtm9+4DZ+yatjnfBEREYqIiLhgfVhYmO0PviR56h3y1N08v1gD8RgAwcZ5CARXIJ5j7RrP9jcZDxgwQHv37tWuXbus/7p3767MzEzr/8PCwlRcXGzd5uDBgzpy5IhcLpckyeVyae/evaqsrLT2cbvdcjqdSklJsXvKAADAMLa/gtO0aVPdfffdfusaN26s5s2bW+tHjRql3NxcxcXFyel06rnnnpPL5VKvXr0kSWlpaUpJSdETTzyhefPmqby8XFOmTFF2dvZFX6UBAAD4oYB9iupvefXVVxUSEqKRI0fK4/EoPT1dr732mrW9UaNGWrt2rcaOHSuXy6XGjRsrKytL+fn5wZguAAC4yVyXwNm4caPfcmRkpJYsWaIlS5Zc8jZt27blEwcAAOCq8F1UAADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDhB+aoGAADw/9q9+EGwp3BFIhr5NO++YM/ib+MVHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYBwCBwAAGIfAAQAAxiFwAACAcQgcAABgHAIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHFsD5yCggL16NFDTZs2VatWrTR8+HAdPHjQb58zZ84oOztbzZs3V5MmTTRy5EhVVFT47XPkyBENGTJE0dHRatWqlZ5//nmdO3fO7ukCAAAD2R44mzZtUnZ2tj777DO53W55vV6lpaXp9OnT1j4TJ07U+++/rz/96U/atGmTjh49qp/85CfW9rq6Og0ZMkRnz57V1q1btXz5ci1btkzTpk2ze7oAAMBAoXYPuH79er/lZcuWqVWrViorK9ODDz6okydP6s0339SqVavUv39/SdLSpUvVoUMHffbZZ+rVq5eKioq0f/9+ffTRR4qPj1fnzp01a9YsvfDCC5oxY4bCw8PtnjYAADCI7YFzvpMnT0qS4uLiJEllZWXyer1KTU219rnrrrvUpk0blZSUqFevXiopKVGnTp0UHx9v7ZOenq6xY8dq37596tKlywXH8Xg88ng81nJ1dbUkyev1yuv12nZ/GsaKCPHZNub1YOdjAAQb5yFME9Ho5vpZbjj3AvEzbdeYAQ2c+vp6TZgwQX369NHdd98tSSovL1d4eLhiY2P99o2Pj1d5ebm1zw/jpmF7w7aLKSgo0MyZMy9YX1RUpOjo6Gu9KxeY1b3e9jEDad26dcGeAmA7zkOYYt59wZ7B1XG73baPWVtba8s4AQ2c7Oxsff7559qyZUsgDyNJysvLU25urrVcXV2tpKQkpaWlyel02nYcr9crt9utqTtC5Kl32DZuoH0+Iz3YUwBsw3kI09w9Y0Owp3BFIkJ8mtW9XgMHDlRYWJitYzdcgblWAQucnJwcrV27Vps3b9att95qrU9ISNDZs2dVVVXl9ypORUWFEhISrH22bdvmN17Dp6wa9jlfRESEIiIiLlgfFhZm+4MvSZ56hzx1N88v1kA8BkCwcR7CFDfTz/EPBeI51q7xbP8Ulc/nU05Ojt555x19/PHHSk5O9tverVs3hYWFqbi42Fp38OBBHTlyRC6XS5Lkcrm0d+9eVVZWWvu43W45nU6lpKTYPWUAAGAY21/Byc7O1qpVq/Tuu++qadOm1ntmYmJiFBUVpZiYGI0aNUq5ubmKi4uT0+nUc889J5fLpV69ekmS0tLSlJKSoieeeELz5s1TeXm5pkyZouzs7Iu+SgMAAPBDtgfO66+/Lknq16+f3/qlS5fqqaeekiS9+uqrCgkJ0ciRI+XxeJSenq7XXnvN2rdRo0Zau3atxo4dK5fLpcaNGysrK0v5+fl2TxcAABjI9sDx+f7+R90iIyO1ZMkSLVmy5JL7tG3blk8cAACAq8J3UQEAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADjEDgAAMA4BA4AADAOgQMAAIxD4AAAAOMQOAAAwDgEDgAAMA6BAwAAjEPgAAAA4xA4AADAOAQOAAAwDoEDAACMQ+AAAADj3NCBs2TJErVr106RkZHq2bOntm3bFuwpAQCAm8ANGzirV69Wbm6upk+frp07d+ree+9Venq6Kisrgz01AABwg7thA2f+/PkaPXq0nn76aaWkpKiwsFDR0dH63e9+F+ypAQCAG1xosCdwMWfPnlVZWZny8vKsdSEhIUpNTVVJSclFb+PxeOTxeKzlkydPSpKOHz8ur9dr29y8Xq9qa2sV6g1RXb3DtnED7bvvvgv2FADbcB7CNKHnTgd7ClcktN6n2tp6fffddwoLC7N17FOnTkmSfD7fNY1zQwbOsWPHVFdXp/j4eL/18fHx+s///M+L3qagoEAzZ868YH1ycnJA5nizafFKsGcAgPMQJnkswOOfOnVKMTExV337GzJwrkZeXp5yc3Ot5fr6eh0/flzNmzeXw2Hfv/Cqq6uVlJSkb775Rk6n07ZxAVw+zkMguAJ5Dvp8Pp06dUqJiYnXNM4NGTgtWrRQo0aNVFFR4be+oqJCCQkJF71NRESEIiIi/NbFxsYGaopyOp38YgWCjPMQCK5AnYPX8spNgxvyTcbh4eHq1q2biouLrXX19fUqLi6Wy+UK4swAAMDN4IZ8BUeScnNzlZWVpe7du+u+++7TggULdPr0aT399NPBnhoAALjB3bCB8+ijj+p///d/NW3aNJWXl6tz585av379BW88vt4iIiI0ffr0Cy6HAbh+OA+B4LoZzkGH71o/hwUAAHCDuSHfgwMAAHAtCBwAAGAcAgcAABiHwLmBtGvXTgsWLAj2NIAb1saNG+VwOFRVVfU39+NcAm4cM2bMUOfOna/7cQmca9CvXz9NmDAh2NMAfjR69+6tb7/91vojYMuWLbvoH/Tcvn27xowZc51nB8DhcGjNmjV+6yZNmuT3d+2ulxv2Y+Km8Pl8qqurU2goDzVwrcLDwy/518x/qGXLltdhNgAuR5MmTdSkSZPrflxjX8Hp16+fxo0bp8mTJysuLk4JCQmaMWOGtb2qqkrPPPOMWrZsKafTqf79+2v37t3W9qeeekrDhw/3G3PChAnq16+ftX3Tpk1auHChHA6HHA6HvvrqK+sl9A8//FDdunVTRESEtmzZoi+//FLDhg1TfHy8mjRpoh49euijjz66Do8EcH3169dPOTk5ysnJUUxMjFq0aKGpU6da3wx84sQJPfnkk2rWrJmio6P10EMP6dChQ9btv/76aw0dOlTNmjVT48aN1bFjR61bt06S/yWqjRs36umnn9bJkyetc7DhHP/hJarHHntMjz76qN8cvV6vWrRooRUrVkj6/i+lFxQUKDk5WVFRUbr33nv17//+7wF+pAD7XOtzniTNnj1brVq1UtOmTfXMM8/oxRdf9Lu0tH37dg0cOFAtWrRQTEyM+vbtq507d1rb27VrJ0kaMWKEHA6HtfzDS1RFRUWKjIy84DLz+PHj1b9/f2t5y5YteuCBBxQVFaWkpCSNGzdOp09f2TeuGxs4krR8+XI1btxYpaWlmjdvnvLz8+V2uyVJjzzyiCorK/Xhhx+qrKxMXbt21YABA3T8+PHLGnvhwoVyuVwaPXq0vv32W3377bdKSkqytr/44ot66aWXdODAAd1zzz2qqanR4MGDVVxcrL/85S8aNGiQhg4dqiNHjgTkvgPBtHz5coWGhmrbtm1auHCh5s+fr3/913+V9P0/Dnbs2KH33ntPJSUl8vl8Gjx4sLxeryQpOztbHo9Hmzdv1t69ezV37tyL/uuvd+/eWrBggZxOp3UOTpo06YL9MjMz9f7776umpsZat2HDBtXW1mrEiBGSpIKCAq1YsUKFhYXat2+fJk6cqMcff1ybNm0KxMMDBMS1POetXLlSc+bM0dy5c1VWVqY2bdro9ddf9xv/1KlTysrK0pYtW/TZZ5+pffv2Gjx4sE6dOiXp+wCSpKVLl+rbb7+1ln9owIABio2N1dtvv22tq6ur0+rVq5WZmSlJ+vLLLzVo0CCNHDlSe/bs0erVq7Vlyxbl5ORc2QPiM1Tfvn19999/v9+6Hj16+F544QXfJ5984nM6nb4zZ874bb/tttt8v/nNb3w+n8+XlZXlGzZsmN/28ePH+/r27et3jPHjx/vt8+c//9knybdmzZq/O8eOHTv6Fi9ebC23bdvW9+qrr/79OwfcwPr27evr0KGDr76+3lr3wgsv+Dp06OD74osvfJJ8n376qbXt2LFjvqioKN8f//hHn8/n83Xq1Mk3Y8aMi47dcH6dOHHC5/P5fEuXLvXFxMRcsN8PzyWv1+tr0aKFb8WKFdb2n//8575HH33U5/P5fGfOnPFFR0f7tm7d6jfGqFGjfD//+c+v+P4DwXCtz3k9e/b0ZWdn+23v06eP7957773kMevq6nxNmzb1vf/++9Y6Sb533nnHb7/p06f7jTN+/Hhf//79reUNGzb4IiIirPN61KhRvjFjxviN8cknn/hCQkJ8f/3rXy85n/MZ/QrOPffc47fcunVrVVZWavfu3aqpqVHz5s2ta4NNmjTR4cOH9eWXX9py7O7du/st19TUaNKkSerQoYNiY2PVpEkTHThwgFdwYKRevXrJ4XBYyy6XS4cOHdL+/fsVGhqqnj17WtuaN2+uO++8UwcOHJAkjRs3TrNnz1afPn00ffp07dmz55rmEhoaqp/97GdauXKlJOn06dN69913rX8t/td//Zdqa2s1cOBAv98HK1assO33AXA9XMtz3sGDB3Xffff53f785YqKCo0ePVrt27dXTEyMnE6nampqrvh5LDMzUxs3btTRo0clff/q0ZAhQ6wPDOzevVvLli3zm2t6errq6+t1+PDhyz6O0e98DQsL81t2OByqr69XTU2NWrdurY0bN15wm4YHOCQkxHrPQIOGl9AvR+PGjf2WJ02aJLfbrZdfflm33367oqKi9NOf/lRnz5697DGBH4NnnnlG6enp+uCDD1RUVKSCggK98soreu655656zMzMTPXt21eVlZVyu92KiorSoEGDJMm6dPXBBx/olltu8bvdjfw9O8D5ruU573JkZWXpu+++08KFC9W2bVtFRETI5XJd8fNYjx49dNttt+kPf/iDxo4dq3feeUfLli2zttfU1OgXv/iFxo0bd8Ft27Rpc9nHMTpwLqVr164qLy9XaGio9Sao87Vs2VKff/6537pdu3b5/QCFh4errq7uso756aef6qmnnrKu+dfU1Oirr766qvkDN7rS0lK/5Ybr9SkpKTp37pxKS0vVu3dvSdJ3332ngwcPKiUlxdo/KSlJzz77rJ599lnl5eXpt7/97UUD53LPwd69eyspKUmrV6/Whx9+qEceecQ6l1NSUhQREaEjR46ob9++13K3gRvS5Tzn3Xnnndq+fbuefPJJa93576H59NNP9dprr2nw4MGSpG+++UbHjh3z2ycsLOyyzsnMzEytXLlSt956q0JCQjRkyBC/+e7fv1+333775d7FizL6EtWlpKamyuVyafjw4SoqKtJXX32lrVu36p//+Z+1Y8cOSVL//v21Y8cOrVixQocOHdL06dMvCJ527dqptLRUX331lY4dO6b6+vpLHrN9+/b6j//4D+3atUu7d+/WY4899jf3B25mR44cUW5urg4ePKi33npLixcv1vjx49W+fXsNGzZMo0eP1pYtW7R79249/vjjuuWWWzRs2DBJ339accOGDTp8+LB27typP//5z+rQocNFj9OuXTvV1NSouLhYx44dU21t7SXn9Nhjj6mwsFBut9u6PCVJTZs21aRJkzRx4kQtX75cX375pXbu3KnFixdr+fLl9j4wQBBcznPec889pzfffFPLly/XoUOHNHv2bO3Zs8fvUnP79u31+9//XgcOHFBpaakyMzMVFRXld6x27dqpuLhY5eXlOnHixCXnlJmZqZ07d2rOnDn66U9/6vdq6QsvvKCtW7cqJydHu3bt0qFDh/Tuu+9e8ZuMf5SB43A4tG7dOj344IN6+umndccddygjI0Nff/214uPjJUnp6emaOnWqJk+erB49eujUqVN+ZSt9f9mpUaNGSklJUcuWLf/mdcj58+erWbNm6t27t4YOHar09HR17do1oPcTCJYnn3xSf/3rX3XfffcpOztb48ePt/7w3tKlS9WtWzc9/PDDcrlc8vl8WrdunfWKSl1dnbKzs9WhQwcNGjRId9xxh1577bWLHqd379569tln9eijj6ply5aaN2/eJeeUmZmp/fv365ZbblGfPn38ts2aNUtTp05VQUGBddwPPvhAycnJNj0iQPBcznNeZmam8vLyNGnSJHXt2lWHDx/WU089pcjISGucN998UydOnFDXrl31xBNPaNy4cWrVqpXfsV555RW53W4lJSWpS5cul5zT7bffrvvuu0979uzx+weH9P17iTZt2qQvvvhCDzzwgLp06aJp06YpMTHxyu637/w3mgDANejXr586d+7MVyUAN7mBAwcqISFBv//974M9lavyo3wPDgAA+H+1tbUqLCxUenq6GjVqpLfeeksfffSR9Xd0bkYEDgAAP3INl7HmzJmjM2fO6M4779Tbb7+t1NTUYE/tqnGJCgAAGOdH+SZjAABgNgIHAAAYh8ABAADGIXAAAIBxCBwAAGAcAgcAABiHwAEAAMYhcAAAgHEIHAAAYJz/Aweb96VhRz2FAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "articles_test_sample.sentiment.hist()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:29.685794Z",
     "iopub.status.busy": "2023-11-07T18:41:29.685422Z",
     "iopub.status.idle": "2023-11-07T18:41:29.698845Z",
     "shell.execute_reply": "2023-11-07T18:41:29.697807Z",
     "shell.execute_reply.started": "2023-11-07T18:41:29.685758Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>id</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5176</th>\n",
       "      <td>В немецком городе Гамбург состоялся экономичес...</td>\n",
       "      <td>7230</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5418</th>\n",
       "      <td>Глава правительства Бакытжан Сагинтаев провел ...</td>\n",
       "      <td>7472</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>856</th>\n",
       "      <td>В среду, 25 января, первый заместитель п...</td>\n",
       "      <td>2910</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8200</th>\n",
       "      <td>Kapital.kz 19 января 2017, 17:11 \\n\\nkapital.k...</td>\n",
       "      <td>10254</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1793</th>\n",
       "      <td>Высококачественные подделки тенге в Казахстане...</td>\n",
       "      <td>3847</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7678</th>\n",
       "      <td>Все отрасли ТЭК. Россия и Мир 20:41, 5.1.17 \\n...</td>\n",
       "      <td>9732</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4840</th>\n",
       "      <td>\\nКАЗАНЬ, 7 октября. /Корр. ТАСС Алексей Угаро...</td>\n",
       "      <td>6894</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5576</th>\n",
       "      <td>О реализации инвестпроектов в агропромышленном...</td>\n",
       "      <td>7630</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7326</th>\n",
       "      <td>Ранее торговый центр был выставлен на торги че...</td>\n",
       "      <td>9380</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5351</th>\n",
       "      <td>Алматы. 26 декабря. Центр информации. Руководс...</td>\n",
       "      <td>7405</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   text     id  sentiment\n",
       "5176  В немецком городе Гамбург состоялся экономичес...   7230          1\n",
       "5418  Глава правительства Бакытжан Сагинтаев провел ...   7472          1\n",
       "856         В среду, 25 января, первый заместитель п...   2910          2\n",
       "8200  Kapital.kz 19 января 2017, 17:11 \\n\\nkapital.k...  10254          2\n",
       "1793  Высококачественные подделки тенге в Казахстане...   3847          1\n",
       "7678  Все отрасли ТЭК. Россия и Мир 20:41, 5.1.17 \\n...   9732          1\n",
       "4840  \\nКАЗАНЬ, 7 октября. /Корр. ТАСС Алексей Угаро...   6894          2\n",
       "5576  О реализации инвестпроектов в агропромышленном...   7630          1\n",
       "7326  Ранее торговый центр был выставлен на торги че...   9380          1\n",
       "5351  Алматы. 26 декабря. Центр информации. Руководс...   7405          2"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "articles_test_sample.sentiment = articles_test_sample.sentiment\\\n",
    "    .apply(lambda x: sentiment_map[x])\n",
    "articles_test_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:29.700106Z",
     "iopub.status.busy": "2023-11-07T18:41:29.699848Z",
     "iopub.status.idle": "2023-11-07T18:41:29.708341Z",
     "shell.execute_reply": "2023-11-07T18:41:29.707604Z",
     "shell.execute_reply.started": "2023-11-07T18:41:29.700078Z"
    }
   },
   "outputs": [],
   "source": [
    "X_articles_test = articles_test_sample.text\n",
    "y_articles_test = articles_test_sample.sentiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:41:29.709564Z",
     "iopub.status.busy": "2023-11-07T18:41:29.709312Z",
     "iopub.status.idle": "2023-11-07T18:42:47.652893Z",
     "shell.execute_reply": "2023-11-07T18:42:47.652100Z",
     "shell.execute_reply.started": "2023-11-07T18:41:29.709541Z"
    }
   },
   "outputs": [],
   "source": [
    "articles_test_dataset = SentimentAnalysisDataset(X_articles_test, y_articles_test, \n",
    "                                                 tokenizer)\n",
    "rubert_articles_test_dataset = SentimentAnalysisDataset(X_articles_test, y_articles_test, \n",
    "                                                 rubert_tokenizer, max_length=512)\n",
    "\n",
    "free_memory()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:42:47.654155Z",
     "iopub.status.busy": "2023-11-07T18:42:47.653887Z",
     "iopub.status.idle": "2023-11-07T18:42:47.659435Z",
     "shell.execute_reply": "2023-11-07T18:42:47.658512Z",
     "shell.execute_reply.started": "2023-11-07T18:42:47.654132Z"
    }
   },
   "outputs": [],
   "source": [
    "articles_test_loader = DataLoader(articles_test_dataset, batch_size=10)\n",
    "rubert_articles_test_loader = DataLoader(rubert_articles_test_dataset, batch_size=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:42:47.661296Z",
     "iopub.status.busy": "2023-11-07T18:42:47.660660Z",
     "iopub.status.idle": "2023-11-07T18:48:45.452113Z",
     "shell.execute_reply": "2023-11-07T18:48:45.451134Z",
     "shell.execute_reply.started": "2023-11-07T18:42:47.661264Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Articles test accuracy from small text model is 0.49367704280155644\n",
      "Articles test f1 score from small text model is 0.3540032465547715\n",
      "CPU times: user 5min 23s, sys: 18.7 s, total: 5min 42s\n",
      "Wall time: 5min 57s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(st_model, articles_test_loader)\n",
    "print(f\"Articles test accuracy from small text model is {accuracy}\")\n",
    "print(f\"Articles test f1 score from small text model is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:48:45.453547Z",
     "iopub.status.busy": "2023-11-07T18:48:45.453276Z",
     "iopub.status.idle": "2023-11-07T18:54:44.081535Z",
     "shell.execute_reply": "2023-11-07T18:54:44.080458Z",
     "shell.execute_reply.started": "2023-11-07T18:48:45.453513Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Articles test accuracy from large text model is 0.7334630350194552\n",
      "Articles test f1 score from large text model is 0.7267055779946027\n",
      "CPU times: user 5min 24s, sys: 18.7 s, total: 5min 42s\n",
      "Wall time: 5min 58s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(lt_model, articles_test_loader)\n",
    "print(f\"Articles test accuracy from large text model is {accuracy}\")\n",
    "print(f\"Articles test f1 score from large text model is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:54:44.083171Z",
     "iopub.status.busy": "2023-11-07T18:54:44.082885Z",
     "iopub.status.idle": "2023-11-07T18:56:15.710761Z",
     "shell.execute_reply": "2023-11-07T18:56:15.709817Z",
     "shell.execute_reply.started": "2023-11-07T18:54:44.083146Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Articles test accuracy from large text RuBert is 0.6381322957198443\n",
      "Articles test f1 score from large text RuBert is 0.5821312898429282\n",
      "CPU times: user 1min 28s, sys: 2.8 s, total: 1min 31s\n",
      "Wall time: 1min 31s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_bert_model(lt_rubert, rubert_articles_test_loader)\n",
    "print(f\"Articles test accuracy from large text RuBert is {accuracy}\")\n",
    "print(f\"Articles test f1 score from large text RuBert is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:56:15.712268Z",
     "iopub.status.busy": "2023-11-07T18:56:15.711972Z",
     "iopub.status.idle": "2023-11-07T18:57:46.763508Z",
     "shell.execute_reply": "2023-11-07T18:57:46.762526Z",
     "shell.execute_reply.started": "2023-11-07T18:56:15.712243Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Articles test accuracy from small text RuBert is 0.5\n",
      "Articles test f1 score from small text RuBert is 0.50463469503104\n",
      "CPU times: user 1min 28s, sys: 2.78 s, total: 1min 31s\n",
      "Wall time: 1min 31s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_bert_model(st_rubert, rubert_articles_test_loader)\n",
    "print(f\"Articles test accuracy from small text RuBert is {accuracy}\")\n",
    "print(f\"Articles test f1 score from small text RuBert is {f1}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:57:46.765024Z",
     "iopub.status.busy": "2023-11-07T18:57:46.764768Z",
     "iopub.status.idle": "2023-11-07T18:57:46.769264Z",
     "shell.execute_reply": "2023-11-07T18:57:46.768306Z",
     "shell.execute_reply.started": "2023-11-07T18:57:46.765001Z"
    }
   },
   "outputs": [],
   "source": [
    "articles_test_loader = DataLoader(articles_test_dataset, batch_size=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-11-07T18:57:46.770716Z",
     "iopub.status.busy": "2023-11-07T18:57:46.770439Z",
     "iopub.status.idle": "2023-11-07T19:13:29.170832Z",
     "shell.execute_reply": "2023-11-07T19:13:29.169791Z",
     "shell.execute_reply.started": "2023-11-07T18:57:46.770693Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Articles test accuracy from small text medium sized model is 0.4474708171206226\n",
      "Articles test f1 score from small text medium sized model is 0.4100246130678619\n",
      "CPU times: user 15min 3s, sys: 15.1 s, total: 15min 18s\n",
      "Wall time: 15min 42s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "accuracy, f1 = evaluate_model(st_medium_model, articles_test_loader)\n",
    "print(f\"Articles test accuracy from small text medium sized model is {accuracy}\")\n",
    "print(f\"Articles test f1 score from small text medium sized model is {f1}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
